import tensorflow as tf
from tensorflow import keras, losses
import pandas as pd

from tensorflow_demo.chapter6_8.NetWork import Network

layers = keras.layers
tf.keras.backend.set_floatx('float64')
# 在线下载汽车效能数据集
dataset_path = keras.utils.get_file('auto-mpg.data',
                                    'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data')
print(dataset_path)
# 利用 pandas 读取数据集，字段有效能（公里数每加仑），气缸数，排量，马力，重量
# 加速度，型号年份，产地
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
                'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                          na_values="?", comment='\t',
                          sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
# 查看部分数据
# print(dataset.head())
a = dataset.isna().sum()
print(len(dataset))
dataset = dataset.dropna()
a = dataset.isna().sum()
# 删除6行空白数据
print(len(dataset))
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1) * 1.0
dataset['Europe'] = (origin == 2) * 1.0
dataset['Japan'] = (origin == 3) * 1.0
dataset.tail()
# print(dataset.head())

train_dataset = dataset.sample(frac=0.8, random_state=0)
# print(train_dataset.index)
test_dataset = dataset.drop(train_dataset.index)
# print(len(test_dataset.index))

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
train_stats = train_dataset.describe()
# print(train_stats)
# train_stats.pop('MPG')
# std 标准差
train_stats = train_stats.transpose()


# print(train_stats)

# 标准化数据
def norm(x):
    return (x - train_stats['mean']) / train_stats['std']


normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
# 打印出训练集和测试集的大小：
print(normed_train_data.shape, train_labels.shape)
print(normed_test_data.shape, test_labels.shape)
print(normed_train_data)
# print(normed_test_data)
# print(train_stats['mean'])

train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values,
                                               train_labels.values))  # 构建 Dataset 对象
train_db = train_db.shuffle(100).batch(32)  # 随机打散，批量化
print(train_db)

model = Network()  # 创建网络类实例
# 通过 build 函数完成内部张量的创建，其中 4 为任意的 batch 数量，9 为输入特征长度
model.build(input_shape=(4, 9))
model.summary()  # 打印网络信息
optimizer = tf.keras.optimizers.RMSprop(0.001)  # 创建优化器，指定学习率
# 获得网络的所有待优化的参数张量列表
# print(model.trainable_variables)

for epoch in range(200):  # 200 个 Epoch
    for step, (x, y) in enumerate(train_db):  # 遍历一次训练集
        # 梯度记录器
        with tf.GradientTape() as tape:
            out = model(x)  # 通过网络获得输出
            loss = tf.reduce_mean(losses.MSE(y, out))  # 计算 MSE
            mae_loss = tf.reduce_mean(losses.MAE(y, out))  # 计算 MAE
            # step = 样本数/batch数32 + 1（整除不加1）
            # print('step=', step)
            if step % 10 == 0:  # 打印训练误差
                print(epoch, step, float(mae_loss))
            # 计算梯度，并更新
            grads = tape.gradient(loss, model.trainable_variables)

            # w' = w - lr * grad，更新网络参数
            # zip() 函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回由这些元组组成的列表。
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
print(model.trainable_variables)
